# coding=utf-8

"""
全国 地区地址码
用途：菜鸟驿站需求参数
"""

import requests
import xlwt
from lxml import etree


class SerialNumber(object):
    """获取全国各区 地址码"""
    def __init__(self):
        self.workbook = xlwt.Workbook(encoding="utf-8", style_compression=0)  # 创建workbook对象
        self.sheet_1 = self.workbook.add_sheet("编号信息", cell_overwrite_ok=0)
        self.hang = 1  # Excel表格行数

        self.url = 'https://www.cnblogs.com/sunbingqiang/p/9273362.html'  # 查询网址
        self.header = {
                "authority": "www.cnblogs.com",
                "method": "GET",
                "path": "/sunbingqiang/p/9273362.html",
                "scheme": "https",
                "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
                "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
        }

    def send(self, url, header):
        """发送"""
        for i in range(3):
            try:
                rep = requests.get(url, headers=header, verify=False, timeout=20)  # proxies=proxies,
                html = rep.content.decode('utf-8', 'ignore')

                return html
            except Exception as e:
                print(e)
                pass

    def deal_with(self):
        """数据提取"""
        title_list = ['编号', '地区']

        for n in range(len(title_list)):  # 表格
            self.sheet_1.write(0, n, title_list[n])

        html = self.send(self.url, self.header)

        html_obj = etree.HTML(html)
        tr_list = html_obj.xpath('//tbody/tr')  # 表格行
        for t in tr_list[1:]:
            data = {
                'number': int(t.xpath('./td[1]/text()')[0]),  # 编号
                'region': t.xpath('./td[2]/text()')[0]  # 地区
            }

            self.save(data)

    def save(self, data):
        """保存"""
        index_dict = {'number': 0, 'region': 1}
        for n in data:
            self.sheet_1.write(self.hang, index_dict[n], data[n])

        self.hang += 1
        self.workbook.save('./全国各区地址码信息.xls')


if __name__ == '__main__':
    a = SerialNumber()
    a.deal_with()
